#######################################
############# Figure A8 ###############
#######################################

library(ggplot2)
library(haven)
library(pbapply)
library(dplyr)
library(tidyr)
library(gridExtra)
library(scales)
library(fixest)
library(data.table)

# data
dat <- read_dta("Data/municip_main.dta")

# Define the list of control variables
controls <- c(
  "log_MaleToFemaleRatio", "log_shareImmigrants", "log_shareBlueCollar",
  "log_PopDensity", "log_sq_PopDensity", "log65", "log75", "log80",
  "log6575", "log6580", "log7580", "log3", "log_MedianStandardLiving10k",
  "log_basemort", "log_shareUnemp", "delta_Immigrants",
  "delta_Unemp", "delta_logStandardLiving", "log_nl2019", "delta_log_nl"
)

# Only include the relevant columns
dat <- dat %>%
  select(excessmort, turnover, ncandidates, dp, all_of(controls))

# Generate all combinations of control variables
all_combinations <- list()
for (k in 1:length(controls)) {
  all_combinations <- c(all_combinations, combn(controls, k, simplify = FALSE))
}

results <- data.frame(Coefficient = numeric(), PValue = numeric())

# Loop through all combinations with progress bar
setFixest_notes(FALSE) # turn off fixest error messages
results_list <- pblapply(all_combinations, function(combo) {
  formula <- as.formula(
    paste("excessmort ~ turnover +", paste(combo, collapse = " + "), "| dp + ncandidates")
  )
  model <- feols(formula, data = dat)
  coef <- coef(model)["turnover"]
  pval <- pvalue(model)["turnover"]
  return(list(Coefficient = coef, PValue = pval)) # Return as a named list
})

# turn list into dataframe
results <- rbindlist(lapply(results_list, function(x) {
  list(Coefficient = x$Coefficient, PValue = x$PValue)
}))


# Generate histograms for coefficients and p-values 
coefficients_fe <- ggplot(results, aes(x = Coefficient)) +
  geom_histogram(bins = 100, fill = "blue", alpha = 0.7) +
  labs(title = "Coefficients for Turnover", x = "Coefficient", y = "Frequency") +
  theme_minimal() +
  scale_y_continuous(labels = comma_format()) 
print(coefficients_fe)

pvalues_fe <- ggplot(results, aes(x = PValue)) +
  geom_histogram(bins = 100, fill = "red", alpha = 0.7) +
  labs(title = "P-values", x = "P-value", y = "Frequency") +
  theme_minimal() +
  scale_y_continuous(labels = comma_format()) 
print(pvalues_fe)

grid.arrange(coefficients_fe, pvalues_fe, ncol = 2)

# PDF of combined plots
pdf("Figures/figA8.pdf", width = 10, height = 5) 
grid.arrange(coefficients_fe, pvalues_fe, ncol = 2)
dev.off()  

# Summary stats
summary_stats <- results %>%
  summarise(across(everything(), list(Min = ~min(.), Max = ~max(.), Median = ~median(.)))) %>%
  pivot_longer(everything(), names_to = c("Variable", "Statistic"), names_sep = "_") %>%
  pivot_wider(names_from = Statistic, values_from = value)
print(summary_stats)